# 将多处的投资主题汇总并去重

import pandas as pd
import Core.MongoDB as db

theme = pd.read_csv('D:/invest_theme.csv', encoding='GBK')
print(len(theme))

theme_dropduplicate = theme.drop_duplicates(['tag'], keep='last')
print(theme_dropduplicate.head())
theme_dropduplicate.to_csv('D:/theme_dropduplicate.csv', encoding='GBK')




database = db.MongoDB("10.13.144.119", "27017")
#database.delete("Label", "Tag", query={"class": "投资主题"})
tag_list = theme_dropduplicate['tag'].tolist()
print(tag_list)
for tag in tag_list:
    print(tag)
    correlationDoc = {}
    correlationDoc["class"] = "投资主题"
    correlationDoc["source"] = "caixue"
    correlationDoc["name"] = tag
    database.upsert("Label", "Tag", {"name": tag, "class": "投资主题"}, correlationDoc)
